### Rachel Porter
### 12/1/2022
### Replication file for Dem veterans' analysis

rm(list=ls())

# load libraries
library("cobalt")
library("WeightIt")
library("ebal")
library("jtools")
library("survey")
library("sensemakr")
library("stargazer")
library("MASS")
library("arm")
library("ggplot2")

# clear environment and set working directory
setwd("~/Dropbox/Primary_Elections/Issue Paper/Replication_Archive/JOP_Replication_Files")

# load data for male/female democrat analysis
master <- readRDS("vet_democrat_data.rds")

# subset data into strategic and not for balancing 
strategic <- subset(master, master$strategic == "Strategic")
not <- subset(master, master$strategic == "Not")

############################### NONVET PROFESSIONAL DEM #################################

# main findings; Model 1 with weights, Table A17
w.out.strat <- weightit(military_number ~ quality_cand + openrace + 
                          dem_pres_av + primary_type + year + 
                          vet_rescaled + installations + gender,
                          data = strategic, estimand = "ATT", method = "ebal")
d.w.strat <- svydesign(ids = ~1, weights = w.out.strat$weights,
                          data = strategic)
fit.strat <- svyglm(narrow_military ~ military_number + quality_cand + openrace + 
                          dem_pres_av + primary_type + year + vet_rescaled + 
                          installations + gender, design = d.w.strat, 
                          family=quasibinomial())

# appendix Model 1 without weights, Table A17
fit2 <- glm(narrow_military ~ military_number + quality_cand + openrace + 
                          dem_pres_av + primary_type + year + vet_rescaled + 
                          installations + gender, data = strategic, 
                          family=binomial())

# appendix Model 2 with weights, Table A17
w.out.strat3 <- weightit(military_number ~ as.factor(qual_chall) + openrace + 
                          dem_pres_av + primary_type + year + vet_rescaled + 
                          installations + gender, data = strategic, 
                          estimand = "ATT", method = "ebal")
d.w.strat3 <- svydesign(ids = ~1, weights = w.out.strat3$weights, 
                          data = strategic)
fit.strat3 <- svyglm(narrow_military ~ military_number + as.factor(qual_chall) + 
                          openrace + dem_pres_av + primary_type + year + 
                          vet_rescaled + installations + gender, 
                          design = d.w.strat3, family=quasibinomial())

# appendix Model 2 without weights, Table A17
fit3 <- glm(narrow_military ~ military_number + as.factor(qual_chall) + 
                          openrace + dem_pres_av + primary_type + year + 
                          vet_rescaled + installations + gender, 
                          data = strategic, family=binomial())

# appendix Model 3, Table A17
w.out.strat2 <- weightit(military_number ~ quality_cand + openrace + ss_party + 
                          primary_type + year + vet_pop + installations + gender,
                          data = strategic, estimand = "ATT", method = "ebal")
d.w.strat2 <- svydesign(ids = ~1, weights = w.out.strat2$weights,
                          data = strategic)
fit.strat2 <- svyglm(narrow_military ~ military_number + quality_cand + 
                          openrace + ss_party + primary_type + year + vet_pop + 
                          installations + gender, design = d.w.strat2, 
                          family=quasibinomial())

#################### 
#### APPENDIX TABLE A17
#################### 
stargazer(fit.strat, fit2, fit.strat3,fit3,fit.strat2,
                          star.char = c("*", "*"),
                          star.cutoffs = c(0.05, 0.01))

# Simulating predicted probabilities for Figure 5
set.seed(12484)
m <- 1000

# Simulate coefficients from a multivariate normal
betas <- fit.strat$coef
vcv <- vcov(fit.strat)
sim.betas <- mvrnorm(m, betas, vcv)

# Check to see if the simulated coefficients look like the real results
round(fit.strat$coef, digits = 2)
round(head(sim.betas, 10), digits = 2)
data.frame(sim.means = apply(sim.betas, 2, mean), 
           betas = betas, sim.sd = apply(sim.betas, 2, sd), 
           se = sqrt(diag(vcv)))   

# Set values for all other variables
military_number <- c(0,1)
vet_rescaled <- c(0.02,0.02)
openrace1 <- c(0,0)
`primary_typeOpen Primary` <- c(1,1)
quality_cand1 <- c(1,1)
installations <- c(2,2)
dem_pres_av <- c(55,55)
year2020 <- c(0,0)
gender1 <- c(0,0)

# Create hypothetical independent variable profile
x.nes <- data.frame(intercept = 1, military_number, quality_cand1, openrace1, 
                    dem_pres_av, `primary_typeOpen Primary`, year2020, 
                    vet_rescaled, installations, gender1)

# Compute the predicted probabilities using the estimated coefficients
pp.betas <- invlogit(as.matrix(x.nes)%*%betas)

# Compute pp and confidence intervals using the simulated coefficients
pp.sim <- matrix(NA, nrow = m, ncol = length(military_number))

for(i in 1:m){
  pp.sim[i, ] <- invlogit(as.matrix(x.nes)%*%sim.betas[i, ]) 
}

pe_strat <- pp.sim[,2] - pp.sim[,1]
lo <- quantile(pe_strat, prob = .025)
hi <- quantile(pe_strat, prob = .975)

vet_vector <- c(mean(pe_strat), lo, hi)

#################### 
## Love plots, APPENDIX FIGURE 13
#################### 

w.out.strat <- weightit(military_number ~ quality_cand + openrace + 
                          dem_pres_av + primary_type + year + 
                          vet_rescaled + installations + gender,
                        data = strategic, estimand = "ATT", method = "ebal")
w.out.strat2 <- weightit(military_number ~ quality_cand + openrace + 
                          dem_pres_av + primary_type + year + 
                          vet_rescaled + installations + gender,
                        data = strategic, estimand = "ATT", method = "cbps")

new.names.strat = c(quality_cand = "Experienced Candidate", 
                    openrace = "Open Seat", 
                    dem_pres_av = "District Presidential Vote", 
                    `primary_type_Open Primary` = "Open Primary", 
                    year_2020 = "2020 Primary Election", 
                    installations = "# Mil. Installations", 
                    vet_rescaled = "% District Veteran Pop.", 
                    gender = "Female Candidate")

love.plot(military_number ~ quality_cand + openrace + dem_pres_av + 
          primary_type + year + vet_rescaled + installations + gender,
          data = strategic, estimand = "ATT",
          stats = c("mean.diffs"),
          weights = list(w1 = get.w(w.out.strat),
                         w2 = get.w(w.out.strat2)),
          var.order = "unadjusted",
          abs = TRUE,
          line = FALSE, 
          title = NULL,
          size =  5,
          threshold = c(mean.diffs = .05,
                        ks = .05),
          var.names = new.names.strat,
          colors = c("black", "black", "black"),
          shapes = c("triangle filled", "circle filled", "square filled"),
          sample.names = c("Unweighted", "Entropy Balanced", "CBPS Weighted"),
          limits = list(mean.diffs = c(0, .55),
                        ks = c(0, .55)),
          wrap = 20, grid = FALSE,
          position = "top",
          themes = list(theme(text = element_text(size=20)),
                        theme(text = element_text(size=20))))

rm(list=setdiff(ls(), c("not", "vet_vector", "pe_strat")))

############################### NONVET AMATEUR DEM #################################

# main findings; Model 1 with weights, Table A18
w.out.not <- weightit(military_number ~ openrace + dem_pres_av + primary_type + 
                        year + vet_rescaled + installations + gender, data = not, 
                        estimand = "ATT", method = "ebal")
d.w.am <- svydesign(ids = ~1, weights = w.out.not$weights,
                        data = not)
fit.am <- svyglm(narrow_military ~ military_number + openrace + dem_pres_av + 
                        primary_type + year + vet_rescaled + installations + 
                        gender, design = d.w.am, family=quasibinomial())

# appendix Model 1 without weights, Table A18
fit2 <- glm(narrow_military ~ military_number + openrace + dem_pres_av + 
                        primary_type + year + vet_rescaled + installations + 
                        gender, data = not, family=binomial())

# appendix Model 2, Table A18
w.out.not2 <- weightit(military_number ~ openrace + ss_party + primary_type + 
                        year + vet_pop + installations + gender, data = not, 
                        estimand = "ATT", method = "ebal")
d.w.am2 <- svydesign(ids = ~1, weights = w.out.not2$weights,
                        data = not)
fit.am2 <- svyglm(narrow_military ~ military_number + openrace + ss_party + 
                        primary_type + year + vet_pop + installations + gender,
                        design = d.w.am2, family=quasibinomial())

#################### 
#### APPENDIX TABLE A18
#################### 
stargazer(fit.am, fit2, fit.am2,
          star.char = c("*", "*"),
          star.cutoffs = c(0.05, 0.01))

# Simulating predicted probabilities for Figure 5
set.seed(12484)
m <- 1000

# Simulate coefficients from a multivariate normal
betas <- fit.am$coef
vcv <- vcov(fit.am)
sim.betas <- mvrnorm(m, betas, vcv)

# Check to see if the simulated coefficients look like the real results
round(fit.am$coef, digits = 2)
round(head(sim.betas, 10), digits = 2)
data.frame(sim.means = apply(sim.betas, 2, mean), 
           betas = betas, sim.sd = apply(sim.betas, 2, sd), 
           se = sqrt(diag(vcv)))   

# Set values for all other variables 
military_number <- c(0,1)
openrace1 <- c(0,0)
vet_rescaled <- c(0.02,0.02)
`primary_typeOpen Primary` <- c(1,1)
year2020 <- c(0,0)
dem_pres_av <- c(55,55)
installations <- c(2,2)
gender1 <- c(0,0)

# Create hypothetical independent variable profile
x.nes <- data.frame(intercept = 1, military_number, openrace1, dem_pres_av,
                    `primary_typeOpen Primary`, year2020, 
                    vet_rescaled, installations, gender1)

# Compute the predicted probabilities using the estimated coefficients
pp.betas <- invlogit(as.matrix(x.nes)%*%betas)

# Compute the predicted probabilities and confidence intervals using the simulated coefficients
pp.sim <- matrix(NA, nrow = m, ncol = length(military_number))

for(i in 1:m){
  pp.sim[i, ] <- invlogit(as.matrix(x.nes)%*%sim.betas[i, ]) 
}

pe_not <- pp.sim[,2] - pp.sim[,1]
lo <- quantile(pe_not, prob = .025)
hi <- quantile(pe_not, prob = .975)

vet_vector <- rbind.data.frame(vet_vector, c(mean(pe_not), lo, hi))
colnames(vet_vector) <- c("pe", "lo", "hi")

# running ttest discussed in manuscript
t.test(pe_strat, pe_not)

# saving simulations to create Figure 4
setwd("~/Dropbox/Primary_Elections/Issue Paper/Replication_Archive/JOP_Replication_Files")
saveRDS(vet_vector, file = "figure_5_dvet.rds")

#################### 
## Love plots, APPENDIX FIGURE 14
#################### 

w.out.not <- weightit(military_number ~ openrace + dem_pres_av + primary_type + 
                        year + vet_rescaled + installations + gender, data = not, 
                      estimand = "ATT", method = "ebal")
w.out.not2 <- weightit(military_number ~ openrace + dem_pres_av + primary_type + 
                        year + vet_rescaled + installations + gender, data = not, 
                      estimand = "ATT", method = "cbps")

new.names.not = c(openrace = "Open Seat", 
                  dem_pres_av = "District Presidential Vote", 
                  `primary_type_Open Primary` = "Open Primary", 
                  year_2020 = "2020 Primary Election", 
                  installations = "# Mil. Installations", 
                  vet_rescaled = "% District Veteran Pop.", 
                  gender = "Female Candidate")


love.plot(military_number ~ openrace + dem_pres_av + primary_type + year + 
          vet_rescaled + installations + gender, 
          data = not, estimand = "ATT",
          stats = c("mean.diffs"),
          weights = list(w1 = get.w(w.out.not),
                         w2 = get.w(w.out.not2)),
          var.order = "unadjusted",
          abs = TRUE,
          line = FALSE, 
          title = NULL,
          size =  5,
          threshold = c(mean.diffs = .05,
                        ks = .05),
          var.names = new.names.not,
          colors = c("black", "black", "black"),
          shapes = c("triangle filled", "circle filled", "square filled"),
          sample.names = c("Unweighted", "Entropy Balanced", "CBPS Weighted"),
          limits = list(mean.diffs = c(0, .55),
                        ks = c(0, .55)),
          wrap = 20, grid = FALSE,
          position = "top",
          themes = list(theme(text = element_text(size=20)),
                        theme(text = element_text(size=20))))
